<?php

namespace Iwester\Services;

use App;
use Carbon\Carbon;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Storage;
use Iwester\Http\Model\Book\Book;
use Iwester\Http\Model\Book\BookAuthor;
use Iwester\Http\Model\Book\BookChapter;
use Iwester\Http\Model\Spider\SpiderArticleContent;
use Iwester\Http\Model\Spider\SpiderTask;
use Log;
use QL\QueryList;

class BookSpiderService
{
    public function __construct()
    {
    }

    # 新笔趣书籍数据规则
    public static $xbiqugeBookRules = [
        'img' => ['#fmimg img', 'src'],
        'name' => ['#info h1', 'text'],
        'author' => ['#info p:eq(0)', 'text'],
        'category' => ['.box_con .con_top>a:eq(1)', 'text'],
        'book_description' => ['#intro p:eq(1)', 'text'],
        'serial_status' => ['#fmimg span', 'class'],
        'laset_time' => ['#info p:eq(2)', 'text'],
    ];

    # 新笔趣章节规则
    public static $xbiqugeChaptersRules = [
        'title' => ['#list dl dd a', 'text'],
        'url' => ['#list dl dd a', 'href'],
    ];

    # 新笔趣章节详情规则
    public static $xbiqugeChaptersDetailRules  = [
        'content' => ['#content', 'html'],
    ];

    # 起点搜索数据规则
    public static $qdSearchRules = [
        'name' => ['#result-list .res-book-item h4 a', 'text'],
        'url' => ['#result-list .res-book-item h4 a', 'href'],
        'author_name' => ['#result-list .res-book-item .book-mid-info .author .name', 'text'],
        'author_url' => ['#result-list .res-book-item .book-mid-info .author .name', 'href'],
        'category' => ['#result-list .res-book-item .book-mid-info .author', 'text'],
        'serial_status' => ['#result-list .res-book-item .book-mid-info .author span', 'text'],
    ];

    # 起点书籍主页数据规则
    public static $qdBookIndexRules = [
        'score' => ['#j_bookScore', 'text'],
        'serial_status' => ['.book-info p.tag span.blue:eq(0)', 'text'],
        'total_recommend' => ['.book-info p:eq(2) em:eq(1)', 'html'],
        'total_recommend1' => ['.book-info p:eq(2) cite:eq(1)', 'text'],
        'week_recommend' => ['.book-info p:eq(2) em:eq(2)', 'html'],
        'week_recommend1' => ['.book-info p:eq(2) cite:eq(2)', 'text'],
        'try_url' => ['#readBtn', 'href'],
    ];

    # 起点书籍详情数据规则
    public static $qdBookDetailRules = [
        'issued_time' => ['.info-list ul li:eq(1) em', 'text'],
    ];

    # 起点书籍详情数据规则
    public static $qdBookAuthorRules = [
        'description' => ['.header-msg-desc', 'text'],
        'write_days' => ['.header-msg-data .ml12:eq(1) .header-msg-strong', 'text'],
    ];

    /**
     * 书籍定时采集任务
     */
    public function spider(){
//        $this->xbiqugeSpiderBookItem('http://www.xbiquge.la/15/15409/', false);
        # 采集书籍
        $this->spiderBook('http://www.xbiquge.la/xiaoshuodaquan/', false); #全部小说
    }

    /**
     * 获取起点html
     * @param $url
     * @return array
     */
    public function qiDianHtml($url){
        $http   = new App\Common\HttpJsonApi();
        $header = [
            'Referer'                   => $url,
            'Accept'                    => "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            'Host'                      => 'www.qidian.com',
            'User-Agent'                => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',
            'Cookie'                    => ''
        ];
        $data   = [ ];
        $res    = $http->CurlHttp($url, $data, 'GET', $header);
        return $res;
    }

    /**
     * 更新书籍：连载中+6小时未更新
     */
    public function updateBookChapter(){
        set_time_limit(0);
        ini_set('memory_limit', -1);
        $books = Book::where('serial_status', 0)->where('updated_at', '<', Carbon::now()->addHour(-6))->limit(1)->get();
        foreach ($books as $book){
            try{
                $html = SpiderService::getHtml($book->source);
                # 更新章节
                $this->checkSaveChapterList($html, $book);
                # 更新书籍部分数据
                if ($book->qd_source!=''){
                    $book = $this->saveQidianBookIndex($book->qd_source, $book);
                    $book->save();
                }
            }catch (\Exception $e){
                \Log::info('定时更新书籍失败  bookID= '.$book->id);
            }
        }
    }

    /**
     * 采集全部书籍
     * @param $url
     */
    public function spiderBook($url, $spiderChapter = true){
        $curListHtml = SpiderService::getHtml($url);
        $listRules = [
            'url' => ['#main .novellist ul li a', 'href'],
            'title' => ['#main .novellist ul li a', 'text'],
        ];
        # 获取书籍列表
        $listUrlDatas = QueryList::html($curListHtml)->rules($listRules)->query()->getData();
        if ($listUrlDatas) {
            foreach ($listUrlDatas as $listUrlData) {
                $bookUrl = $listUrlData['url'];
                if (!Book::where('source', $bookUrl)->first()){
                    # 获取单个书籍数据
                    $this->xbiqugeSpiderBookItem($bookUrl, $spiderChapter);
                    dump('书籍存储完成:'.$bookUrl);
                    $sleep = rand(1,5);
                    sleep($sleep);
                }
            }
        }
    }

    /**
     * 获取单个书本数据
     * @param $url
     */
    public function xbiqugeSpiderBookItem($url, $spiderChapter = true){
        $html = SpiderService::getHtml($url);
        # 获取书籍信息
        $bookDatas = QueryList::html($html)->rules(self::$xbiqugeBookRules)->query()->getData();
        if ($bookDatas && $bookDatas->first()){
            $bookData = $bookDatas->first();
            # 判断作者是否需要入库
            $authorName = str_replace('作    者：', '', $bookData['author']);
            $author = $this->checkAuthorSave($authorName);
            # 根据书籍名称 + 作者名称判断书籍是否需要入库
            $book = $this->saveBook($bookData, $author, $url);
            # 判断章节是否需要入库
            if ($spiderChapter) $this->checkSaveChapterList($html, $book);
        }
    }

    /**
     * 判断章节是否需要入库
     */
    public function checkSaveChapterList($html, $book){
        dump('start 更新章节');
        $chapters = QueryList::html($html)->rules(self::$xbiqugeChaptersRules)->query()->getData();
        if ($chapters){
            try{
                DB::beginTransaction();
                $bookFirst = 0;
                foreach ($chapters as $k=> $chapterData){
                    $chapterEx = BookChapter::where('book_id', $book->id)->where('title', $chapterData['title'])->first();
                    if ($chapterEx) continue;
                    $chapterUrl = 'http://www.xbiquge.la'.$chapterData['url'];
                    $chapterHtml = SpiderService::getHtml($chapterUrl);
                    $chapterDetail = QueryList::html($chapterHtml)->rules(self::$xbiqugeChaptersDetailRules)->query()->getData();
                    if ($chapterDetail){
                        $chapterDetail = $chapterDetail->first();
                        $chapterEntity = $this->saveChapterData($chapterDetail, $chapterData, $book, $k);
                        if ($k == 0) $bookFirst = $chapterEntity->id;
                    }
                    $sleep = rand(1,5);
                    sleep($sleep);
                }
                if ($book->first_chapter_id ==0) $book->first_chapter_id = $bookFirst;
                $book->book_words_num = BookChapter::where('book_id', $book->id)->sum('word_num');
                $book->save();
                DB::commit();
            }catch (\Exception $e){
                DB::rollBack();
                dump('更新章节失败 --'.$html);
            }
        }
    }

    /**
     * 保存书籍 - 章节
     * @param $bookDatas
     * @param $authorId
     * @return Book
     */
    public function saveChapterData($chapterDetail, $chapterData, $book, $index){
        try{
            $bookLastchapter = BookChapter::find($book->last_chapter_id);
            $bookChapter = new BookChapter();
            $bookChapter->book_id = $book->id;
            $bookChapter->chapter_num = $bookLastchapter ? ($bookLastchapter->chapter_num + 1) : 1;
            $bookChapter->pre_chapter_id = $book->last_chapter_id;
            $bookChapter->title = $chapterData['title'];
            $bookChapter->content = $chapterDetail['content'];
            $bookChapter->word_num = $this->word_num($chapterDetail['content']);
            $bookChapter->save();
            if ($bookLastchapter){
                $bookLastchapter->next_chapter_id = $bookChapter->id;
                $bookLastchapter->save();
            }
            $book->last_chapter_id = $bookChapter->id;
            $book->last_update_time = date('Y-m-d H:i:s');
            $book->save();
            dump('chapter save'.$bookChapter->id);
            return $bookChapter;
        }catch (\Exception $e){
            throw new \Exception($e->getMessage());
        }
    }

    /**
     * 保存书籍
     * @param $bookDatas
     * @param $authorId
     * @return Book
     */
    public function saveBook($bookData, $author, $url){
        $book = Book::where('name', $bookData['name'])->where('book_author_id', $author->id)->first();
        if (!$book){
            $imgPath = FunctionService::storeImage($bookData['img'], '.jpg' ,'public_book');
            $book = new Book();
            $book = $this->qidian($book, $bookData['name'], $author);
            $book->name = $bookData['name'];
            $book->img = $imgPath;
            $book->first_chapter_id = 0;
            $book->last_chapter_id = 0;
            $book->book_author_id = $author->id;
            if (!$book->book_category_id) $book->book_category_id = $this->bookCate($bookData['category']);
            $book->book_description = $bookData['book_description'];
//            if(!$book->serial_status) $book->serial_status = $this->serialStatus($bookData);
            dump('新增书籍：'.$bookData['name']);
            $book->source = $url;
            $book->serial_status = 0; # 初始化书本，不抓文章，全部连载中
            $book->save();
            return $book;
        }else{
            return $book;
        }
    }

    /**
     * 获取起点数据
     * @param $book
     * @param $bookName
     * @param $author
     * @return mixed
     */
    public function qidian($book, $bookName, $author){
        $url = 'https://www.qidian.com/search?kw=+'.urlencode($bookName);
        $html = $this->qiDianHtml($url);
        $bookDatas = QueryList::html($html)->rules(self::$qdSearchRules)->query()->getData();
        if ($bookDatas){
            foreach ($bookDatas as $bookData){
                if ($bookData['name'] == $bookName && $author->author_name == $bookData['author_name']){
                    $qidianUrl = 'https:'.$bookData['url'];
                    $book->qd_source = $qidianUrl;
                    $cate = explode('|', $bookData['category'])[1];
                    $book->book_category_id = $this->qidianBookCate($cate);
                    # 获取起点书籍主页数据
                    $sleep = rand(1,3);
                    sleep($sleep);
                    $book = $this->saveQidianBookIndex($qidianUrl, $book);
                    # 获取起点作者主页数据
                    $sleep = rand(1,3);
                    sleep($sleep);
                    $author_url = 'https:'.$bookData['author_url'];
                    $authorHtml = $this->qiDianHtml($author_url);
                    $qidianAuthorData = QueryList::html($authorHtml)->rules(self::$qdBookAuthorRules)->query()->getData();
                    if ($qidianAuthorData && $qidianAuthorData->first()){
                        $qidianAuthorData = $qidianAuthorData->first();
                        $author->description = $qidianAuthorData['description'];
                        $author->write_days = $qidianAuthorData['write_days'] * 1;
                        $author->save();
                    }
                    break;
                }
            }
        }
        return $book;
    }

    /**
     * 保存起点书籍主页部分数据
     * @param $qidianUrl
     * @return mixed
     */
    public function saveQidianBookIndex($qidianUrl,$book){
        $qidianBookHtml = $this->qiDianHtml($qidianUrl);
        $qidianBookData = QueryList::html($qidianBookHtml)->rules(self::$qdBookIndexRules)->query()->getData();
        if ($qidianBookData && $qidianBookData->first()){
            $qidianBookData = $qidianBookData->first();
            $book->score = isset($qidianBookData['score']) ? ($qidianBookData['score'] * 1) : 0;
            $book->serial_status = $this->qidianSerialStatus($qidianBookData['serial_status']);
            $try_url = 'https:'.$qidianBookData['try_url'];
            $sleep = rand(1,3);
            sleep($sleep);
            $qidianBookDetailHtml = $this->qiDianHtml($try_url);
            $qidianBookDetailData = QueryList::html($qidianBookDetailHtml)->rules(self::$qdBookDetailRules)->query()->getData();
            if ($qidianBookDetailData && $qidianBookDetailData->first()){
                $qidianBookDetailData = $qidianBookDetailData->first();
                $book->issued_time = $qidianBookDetailData['issued_time'];
            }
        }
        return $book;
    }

    /**
     * 字数
     * @param $content
     * @return bool|int
     */
    public function word_num($content){
        return mb_strlen(FunctionService::h($content, '', 1));
    }

    /**
     * 连载状态
     * @param $bookData
     * @return int
     */
    public function serialStatus($bookData){
        $laset_time = str_replace('最后更新：', '', $bookData['laset_time']);
        if (Carbon::now()->diffInDays(Carbon::parse($laset_time)) > 100){
            return 1;
        }
        switch ($bookData['serial_status']){
            case 'b': # 连载
                return 0;
            default:
                return 1;
        }
    }

    /**
     * 连载状态
     * @param $bookData
     * @return int
     */
    public function qidianSerialStatus($serial_status){
        switch ($serial_status){
            case '连载': # 连载
                return 0;
            default:
                return 1;
        }
    }


    /**
     * 分类
     * @param $cateName
     * @return int
     */
    public function bookCate($cateName){
        switch ($cateName){
            case '玄幻小说':
                return 6;
            case '修真小说':
                return 6;
            case '都市小说':
                return 1;
            case '穿越小说':
                return 6;
            case '网游小说':
                return 12;
            case '科幻小说':
                return 2;
            default:
                return 1;
        }
    }

    /**
     * 分类
     * @param $cateName
     * @return int
     */
    public function qidianBookCate($cateName){
        switch ($cateName){
            case '玄幻':
                return 6;
            case '奇幻':
                return 8;
            case '武侠':
                return 5;
            case '仙侠':
            case '仙侠奇缘':
                return 3;
            case '现实生活':
            case '都市':
            case '传记':
            case '现实':
                return 1;
            case '军事':
                return 13;
            case '历史':
                return 10;
            case '游戏':
            case '游戏竞技':
                return 12;
            case '体育':
                return 4;
            case '科幻':
                return 2;
            case '悬疑':
            case '悬疑推理':
                return 1;
            case '现代言情':
                return 17;
            case '玄幻言情':
                return 18;
            case '科幻空间':
                return 19;
            case '古代言情':
                return 21;
            case '浪漫青春':
            case '青春文学':
            case '文学':
                return 22;
            case '轻小说':
            case '短篇':
                return -1;
            default:
                return -1;
        }
    }

    /**
     * 保存作者
     * @param $authorName
     * @return BookAuthor
     */
    public function checkAuthorSave($authorName){
        $author = BookAuthor::where('author_name', $authorName)->first();
        if (!$author){
            dump('新增作者：'.$authorName);
            $author = new BookAuthor();
            $author->author_name = $authorName;
            $author->author_level = '作家';
            $author->save();
            return $author;
        }else{
            return $author;
        }
    }

































































    /**
     * 书籍定时采集任务
     */
    public function spider2(){
//        # 采集书籍
//        $this->spiderBook('http://www.biquge.info/list/1_[PAGE].html', 6); #玄幻小说
    }

    public function spiderBook1($url, $cateId){
        # 1: 获取页码
        $pageUrl = str_replace(SpiderTask::PAGE, 1, $url);
        $html = SpiderService::getHtml($pageUrl);
        # 1: 获取列表页码
        $list_page_param = [
            'rule'=> "$('#pagelink .last')",
            'type'=> "text",
        ];
        $maxPage = SpiderService::formatPageParam($html, $list_page_param);
        # 2:循环列表页
        for ($p = 1; $p <= $maxPage; $p++){
            $curListPageUrl = str_replace(SpiderTask::PAGE, $p, $url);
            $listRules = [
                'url' => ['#newscontent .l ul li .s2 a', 'href'],
                'title' => ['#newscontent .l ul li .s2 a', 'text'],
                'author' => ['#newscontent .l ul li .s4', 'text'],
            ];

            // 获取详情页url
//            $listUrlDatas = QueryList::get($curListPageUrl)->rules($listRules)->query()->getData();
            header("Content-Type:text/html;charset=utf-8");
            $html = \iconv('UTF-8','GBK',file_get_contents($curListPageUrl));
            $data = QueryList::html($html)->rules([
                'title' => ['#newscontent .l ul li .s2 a', 'text'],
            ])->query()->getData();












            $curListHtml = SpiderService::getHtml($curListPageUrl);
            $list_param = [
                ['key'=> 'url', 'rule'=> "$('#newscontent .l ul li .s2 a')", 'type'=> 'href'],
                ['key'=> 'title', 'rule'=> "$('#newscontent .l ul li .s2 a')", 'type'=> 'text'],
                ['key'=> 'author', 'rule'=> "$('#newscontent .l ul li .s4')", 'type'=> 'text'],
            ];
            $html = QueryList::get($curListPageUrl)->removeHead()->getHtml();
            $encode = mb_detect_encoding($html, ['ASCII', 'GB2312', 'GBK', 'UTF-8', 'BIG5']);
            $html = mb_convert_encoding($html, 'UTF-8', $encode);
            $listDatas = SpiderService::testListQuery($curListHtml, $list_param);
            if ($listDatas->count() > 0){
                # 获取到列表页数据
                foreach ($listDatas as $data){
                    if (isset($data['url'])){
                        $detailUrl = $data['url'];
                        \Log::info($data);
                    }
                }
            }
            sleep(3);
        }
    }

    /**
     * 书籍定时采集任务
     */
    public function spider1()
    {
        try {
            set_time_limit(0);
            ini_set('memory_limit', -1);
            $task = SpiderTask::with('listConfig')->where('status', 1)->where('next_spider_time', '<', Carbon::now())
                ->orderBy('priority', 'desc')->first();
            if (!$task){
                \Log::info('暂无任务');
                return;
            };
//            if ($task->freq > 0){
//                $task->cur_spider_time = Carbon::now();
//                $task->next_spider_time = Carbon::now()->addHour(SpiderTask::$freqHours[$task->freq]);
//                $task->save();
//            }
            $this->spiderItem($task);
        } catch (\Exception $e) {
            \Log::info($e->getMessage());
        }
    }

    /**
     * 定时采集任务 - 单个开始
     * @param $task
     */
    public function spiderItem($task){
        try{
            $listConfig = $task->listConfig;
            $list_param = json_decode($listConfig->list_url_content, true);
            if ($listConfig->list_url == '' || $listConfig->list_url_content == ''){
                \Log::info('缺少参数');
                return;
            }
            $listPageUrl = $listConfig->list_url;
            $pageUrl = str_replace(SpiderTask::PAGE, 1, $listPageUrl);
            $html = SpiderService::getHtml($pageUrl, $task->cookie);
            # 1: 获取列表页码
            $list_page_param = [
                'rule'=> $listConfig->list_page_matche,
                'type'=> $listConfig->list_page_attr,
            ];
            $maxPage = SpiderService::formatPageParam($html, $list_page_param);
            $formatUrlDomain = SpiderService::formatUrlDomain($pageUrl);
            $preUrl = '';
            $urlPrefix = isset($list_param[0]['url_prefix']) ? $list_param[0]['url_prefix'] : '';
            if ($urlPrefix == 1){
                $preUrl = $formatUrlDomain['host'];
            }elseif ($urlPrefix == 2){
                $preUrl = $formatUrlDomain['p_url'];
            }
            # 2:循环列表页
            for ($p = 1; $p <= $maxPage; $p++){
                    $curListPageUrl = str_replace(SpiderTask::PAGE, $p, $listPageUrl);
                    $curListHtml = SpiderService::getHtml($curListPageUrl, $task->cookie);
                    $listDatas = SpiderService::testListQuery($curListHtml, $list_param);
                    if ($listDatas->count() > 0){
                        # 获取到列表页数据
                        foreach ($listDatas as $data){
                            if (isset($data['url'])){
                                $detailUrl = $preUrl.$data['url'];
                                if (!SpiderArticleContent::where('detail_url', $detailUrl)->first()){
                                    $this->spiderDetail($data,$detailUrl, $task, $listConfig);
                                    sleep(3);
                                }else{
                                    break;
                                }
                            }
                        }
                    }
                }
        }catch (\Exception $e){
            \Log::info($e->getMessage());
        }
    }

    /**
     * 定时采集任务 - 获取详情页数据
     * @param $data
     * @param $detailUrl
     * @param $task
     */
    public function spiderDetail($listData, $detailUrl, $task, $listConfig){
        $listConfig['page_param'] = [
            'rule'=> $listConfig->detail_page_matche,
            'type'=> $listConfig->detail_page_attr,
        ];
        $listConfig['data_param'] = json_decode($listConfig->detail_url_content, true);
        $result = self::detail($detailUrl, $listConfig->toArray(), $task->cookie);
        if ($result){
            // 存储数据
            if (isset($listData['category'])){
                $article_category = $listData['category'];
            }elseif (isset($result['category'])){
                $article_category = $result['category'];
            }else{
                $article_category = '';
            }
            $detail = [
                'task_id'=> $task->id,
                'task_config_id'=> $listConfig->id,
                'list_url'=> $listData['url'] ?? '',
                'detail_url'=> $detailUrl,
                'content'=> $result['content'] ?? '',
                'title'=> $result['title'] ?? '',
                'table_template'=> $result['table_template'] ?? '',
                'published_at'=> $result['published_at'] ?? date('Y-m-d'),
                'article_category'=> $article_category,
                'cover'=> $listData['cover'] ?? '',
            ];
            SpiderArticleContent::create($detail);
        }
    }

    /**
     * 获取详情页数据
     * @param $pageUrl
     * @param $detailParams
     * @param $cookie
     * @return array
     */
    public static function detail($pageUrl, $detailParams, $cookie){
        $maxPage = 1;
        $result = [];
        $content = '';
        $data= [];
        $purifier   = array( 'HTML.Allowed' => 'div,p,img[width|src]', 'CSS.AllowedProperties' => '' );
        # 1: 判定是否需要分页
        if ($detailParams['detail_use_page'] == 1){
            $html = SpiderService::getHtml($pageUrl, $cookie);
            $maxPage = SpiderService::formatPageParam($html, $detailParams['page_param']);
            $curpageData = SpiderService::articleDetailQuery($html, $detailParams['data_param']);
            if ($curpageData->count() > 0){
                $data = $curpageData[0];
                $content.= isset($data['content']) ? $data['content'] : '';
            }
            # 如果需要分页 多次请求内容合并
            if ($maxPage > 1){
                for ($p = 1; $p <= $maxPage; $p++){
                    if ($p == 1) continue;
                    $pageUrl = str_replace(SpiderTask::PAGE, $p, $detailParams['fenye_url']);
                    $fenyeContent = SpiderService::articleDetailQuery(SpiderService::getHtml($pageUrl, $cookie), $detailParams['data_param']);
                    if ($fenyeContent->count() > 0 && isset($fenyeContent[0]['content'])){
                        $content.= $fenyeContent[0]['content'];
                    }
                }
            }
        }else{
            $datas = SpiderService::articleDetailQuery(SpiderService::getHtml($pageUrl, $cookie), $detailParams['data_param']);
            if ($datas->count() == 0)  return  ['code'=> 201, 'message'=> '数据为空'];
            $data = $datas[0];
            $content = $data['content'];
        }
        if ($content != ''){
            $formatContent = FunctionService::formatHtml($content, $purifier);
            if (isset($data['published_at']) && strtotime($data['published_at'])){
                $published_at = $data['published_at'];
            }else{
                $published_at =  date('Y-m-d');
            }
            $result = [
                'title'=> $data['title'],
                'category'=> $data['category'] ?? '',
                'published_at'=> $published_at,
                'content'=> $formatContent,
                'table_template'=> isset($detailParams['table_template']) ? $detailParams['table_template'] : 'article',
            ];
            return $result;
        }
        return [];
    }



    public static function getHtml($url, $cookie = '', $is_roxy = false){
        $headers = FunctionService::setHeader($url, $cookie, $is_roxy);
        $html = QueryList::get($url, [], $headers)->removeHead()->getHtml();
        $encode = mb_detect_encoding($html, ['ASCII', 'GB2312', 'GBK', 'UTF-8', 'BIG5']);
        $html = mb_convert_encoding($html, 'UTF-8', $encode);
//        $html   = FunctionService::formatHtmlSpace($html);
        return $html;
    }

    /**
     * 分析最大页码
     */
    public static function formatPageParam($html, $pageRules)
    {
        if ($pageRules['rule'] == '') return 1;
        if (strpos($pageRules['rule'], '$') !== false) {
            # 包含 $ 为元素选择器
            $pageSizeRules = [
                'maxPage' => [$pageRules['rule'], $pageRules['type']],
            ];
            $datas = QueryList::html($html)->rules($pageSizeRules)->query()->getData();
            return $datas->first() ? $datas->first()['maxPage'] : 1;
        }else{
            # 不包含 $ 为正则匹配
            $rule = str_replace('/', '\/', $pageRules['rule']);
            $preg_match = preg_match('/'.$rule.'/i', $html, $match);
            if ($preg_match && $match){
                return isset($match[1]) ? $match[1] : 1;
            }
            return 1;
        }
    }

    /**
     * 测试请求列表页数据
     */
    public static function testListQuery($html, $listParams)
    {
        $listRule = [];
        foreach ($listParams as $key => $listParam) {
            if ($listParam['rule'] != '') {
                $listRule[$listParam['key']] = [$listParam['rule'], $listParam['type']];
            }
        }
        $datas = QueryList::html($html)->rules($listRule)->query()->getData();
        return $datas;
    }

    /**
     * 解析详情页数据
     */
    public static function articleDetailQuery($html, $dataParams)
    {
        $rules = [];
        foreach ($dataParams as $key => $param) {
            if ($param['rule'] != '') {
                $rules[$param['key']] = [$param['rule'], $param['type']];
            }
        }
        $datas = QueryList::html($html)->rules($rules)->query()->getData();
        return $datas;
    }

    public static function formatUrlDomain($pageUrl){
        try{
            $parse_url = parse_url($pageUrl);
            if (strpos($pageUrl, '[PAGE]') === false){
                $p_url = $pageUrl;
            }else{
                $p_url = substr($pageUrl,0,strrpos($pageUrl, '/'));
            }
            return ['host'=> $parse_url['scheme'].'://'.$parse_url['host'], 'p_url'=> $p_url];
        }catch (\Exception $e){
            return ['host'=> $pageUrl, 'p_url'=> $pageUrl];
        }
    }
}